---
title: "Replication for: The Long-Run Effects of Religious Persecution: Evidence from the Spanish Inquisition"
subtitle: "GOV 2001"
author: "Noah Dasanaike and Luchuan Deng"
output: pdf_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

library(tidyverse)
library(mapSpain)
library(haven)
library(sf)
library(ggspatial)
library(ggthemes)
library(stargazer)
library(binsreg)
library(stringr)
library(sandwich)
library(lmtest)
library(cem)
library(MatchIt)
```

```{r, include = FALSE}
# read in original data set
data <- read_dta("Inquisition_analysis_dataset.dta")

# read in shapefiles from mapSpain library
munic <- esp_get_munic() %>% 
  mutate(name = str_replace_all(name, " / ", "/"))

# create map geometry for later figures
merge <- data %>% 
  select(muni_name, adj_impact) %>%
  rename(name = muni_name) %>% 
  inner_join(munic %>% filter(!ine.ccaa.name %in% c("Canarias")) %>% select(name, geometry))
```

```{r}
# create province outlines for figure 1 map 

province <- munic %>% 
  group_by(ine.ccaa.name) %>%
  filter(!ine.ccaa.name %in% c("Canarias")) %>% 
  summarize(geometry = st_union(geometry))
```

\newpage

```{r, warning = FALSE, message = FALSE, fig.show = "hold", out.width = "50%"}
#### FIGURE 1/A1

figure_one <- merge %>%
  ggplot() +
  geom_sf(aes(geometry = geometry, fill = cut(adj_impact, 
                                              breaks = c(-Inf, 0, .025, .05, .075, .1, 1))), 
          lty = .1) +
  scale_fill_brewer("Inquisition Intensity", palette = "Blues") +
  geom_sf(data = province, aes(geometry = geometry), alpha = 0, color = "black") +
   annotation_scale(location = "bl", width_hint = 0.5) +
    annotation_north_arrow(location = "bl", which_north = "true", pad_y = unit(.5, "in"),
        style = north_arrow_fancy_orienteering) +
  theme_map() +
  theme(legend.justification = c(1, 0), legend.position = c(1, 0),
        legend.text = element_text(size = 7),
        legend.key.size = unit(.5, 'cm'),
        legend.title = element_text(size = 9))

figure_one

# ggsave(plot = figure_one, "figure1.png", device = "png")
```

\newpage

```{r, results = "asis", warning = FALSE, message = FALSE}
#### TABLE 1
library(sandwich)
library(lmtest)

# run each of the respective regressions, then get the robust standard errors and p-values 

# regression on logged gdp

table_one_a <- lm(log_gdppc ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia), data %>% filter(trib_head == 0))

table_one_a_stat <- coeftest(table_one_a, vcov = vcovHC(table_one_a, type="HC1"))[,3]
table_one_a_p  <- coeftest(table_one_a, vcov = vcovHC(table_one_a, type="HC1"))[,4]

# regression on religiosity

table_one_b <- lm(religious ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia), data %>% filter(trib_head == 0))

table_one_b_stat <- coeftest(table_one_b, vcov = vcovHC(table_one_b, type="HC1"))[,3]
table_one_b_p  <- coeftest(table_one_b, vcov = vcovHC(table_one_b, type="HC1"))[,4]

# regression on education

table_one_c <- lm(c_secondplus ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia), data %>% filter(trib_head == 0)) 

table_one_c_stat <- coeftest(table_one_c, vcov = vcovHC(table_one_c, type="HC1"))[,3]
table_one_c_p  <- coeftest(table_one_c, vcov = vcovHC(table_one_c, type="HC1"))[,4]

# regression on social trust

table_one_d <- lm(trust2 ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia), data %>% filter(trib_head == 0)) 

table_one_d_stat <- coeftest(table_one_d, vcov = vcovHC(table_one_d, type="HC1"))[,3]
table_one_d_p  <- coeftest(table_one_d, vcov = vcovHC(table_one_d, type="HC1"))[,4]

# create stargazer plot with robust standard errors 

star = stargazer(table_one_a, table_one_b, table_one_c, table_one_d, type = "latex", 
                 se = list(table_one_a_stat, table_one_b_stat, table_one_c_stat, table_one_d_stat), 
                 p = list(table_one_a_p, table_one_b_p, table_one_c_p, table_one_d_p),
          header = FALSE, title = "",
          omit = "autonomia",
          dep.var.labels = c("log GDP p.c.", "religiosity", "education", "trust"),
          covariate.labels = c("inquisitorial intensity", "log population", "latitude",
                               "longitude", "ruggedness", "distance to tribunal",
                               "distance to river", "distance to road", "distance to sea",
                               "share married", "share upper class", "average age",
                               "constant"),
          add.lines = list(c("regional FE", "Yes", "Yes", "Yes", "Yes")),
          omit.stat = c("adj.rsq", "ser", "f"))

star = sub('^.+\\caption.+$','', star)

# knitr::include_graphics(path = c("original/table1.png"))
```

\newpage

```{r, warning = FALSE, message = FALSE, , fig.show = "hide"}
#### FIGURE 2

# create 20 data bins as in the original paper

fig_2_data <- binsreg(data$adj_impact, data$log_den_rel_pre,
        nbins = 20)$data.plot[[1]][["data.dots"]]
```

```{r, warning = FALSE, message = FALSE, , fig.show = "hold", out.width = "50%"}
figure_two <- fig_2_data %>% 
  ggplot(aes(x, fit)) +
  geom_point(shape = 1, color = "navy") + 
  geom_smooth(method = "lm", formula = y ~ x, se = FALSE,
              lty = "dashed", color = "coral3") +
  labs(x = "log of density of pre-1480 religious figures", 
       y = "inquisitorial intensity") + 
  theme(axis.line = element_line(colour = "black"),
        axis.text.x = element_text(color = "black"),
        axis.text.y = element_text(color = "black")) +
  scale_y_continuous(breaks = c(.01, .015, .02, .025, .03),
                     limits = c(.01, .03),
                     labels = scales::label_number()) +
  scale_x_continuous(breaks = c(-5:-1),
                     limits = c(-5, -1)) +
  theme_bw()

figure_two

# ggsave(plot = figure_two, "figure2.png", device = "png")

# knitr::include_graphics(path = c("figure2.png", "original/orig_figure3.png"))
```
\newpage

EXTENSION

\newpage

```{r, include = FALSE}
#### EXTENSION DATA

# read in data on historical regions (original) and agriculture

historical_regions <- readxl::read_xlsx("historical_provinces.xlsx") %>% 
  mutate(historical_capital_long = as.numeric(historical_capital_long))

historical_regions <- st_as_sf(historical_regions, coords = 
                                                         c("historical_capital_long", 
                                                           "historical_capital_lat"), crs = 4386) %>% 
  rename(historical_region_geom = geometry)

agriculture <- readxl::read_xls("agri.xls") %>%
  mutate(used_agricultural_area = lag(used_agricultural_area),
         total_livestock = lag(total_livestock),
         agricultural_holdings = lag(agricultural_holdings)) %>%
  rowwise() %>% 
  mutate(municipality = str_split(municipality, " ")[[1]][1]) %>% 
  head(-6) %>% 
  filter(municipality != "2009") %>% 
  mutate(municipality = as.numeric(municipality)) %>% 
  rename(preferred_inecode = municipality)

# merge this data into a modified data set with new covariates

data_modified <- data %>% 
  left_join(historical_regions) %>% 
  left_join(agriculture) %>% 
  rename(name = muni_name) %>% 
  left_join(munic %>% select(ine.ccaa.name, name)) %>% 
  mutate(density = pop_padron / area) %>% 
  rename(communities = ine.ccaa.name)
```

```{r}
# code for calculating the distance between each municipality and the capital of madrid
# read in pre-saved data from earlier run

data_modified$geometry_transformed <- st_set_crs(data_modified$geometry, 4386)
# 
# data_modified$capital_dist <- NA
# capital <- st_as_sf(tibble(lat = c(40.416667), long = c(-3.716667)), coords = c("long", "lat"), crs = 4386)
# 
# for (i in 1:nrow(data_modified)){
#   data_modified$capital_dist[i] <- st_distance(data_modified$geometry_transformed[i],
#                         capital)
# }
# 
# city_distances <- data_modified %>%
#   select(munisid, capital_dist)
# 
# write.csv(city_distances, "city_distances.csv")

city_distances <- read_csv("city_distances.csv") %>%
  select(-c(X1, regional_city_dist)) %>% 
  group_by(munisid) %>% 
  summarize(capital_dist = min(capital_dist))

data_modified <- data_modified %>%
  left_join(city_distances, by = "munisid")
```

```{r}
# code for calculating the distance between each municipality and ecah of the historic mosques
# read in pre-saved data from earlier run

# mosques <- readxl::read_xlsx("mosque locations.xlsx") %>% 
#   mutate(former_mosque_long = as.numeric(former_mosque_long))
# 
# mosques <- st_as_sf(mosques, coords = c("former_mosque_long", "former_mosque_lat"), crs = 4386) %>%
#   rename(historical_mosque_geom = geometry)
# 
# data_modified$historical_mosque_dist <- NA
# 
# for (i in 1:nrow(data_modified)){
#   distances <- st_distance(data_modified$geometry_transformed[i], mosques)
# 
#   data_modified$historical_mosque_dist[i] <- min(as.vector(distances))
# }
# 
# mosque_distances <- data_modified %>%
#   select(munisid, historical_mosque_dist)
# 
# write.csv(mosque_distances, "mosque_distances.csv")

mosque_distances <- read_csv("mosque_distances.csv") %>%
  select(-c(X1)) %>% 
  group_by(munisid) %>% 
  summarize(historical_mosque_dist = min(historical_mosque_dist))

data_modified <- data_modified %>%
  left_join(mosque_distances)
```

```{r}
# code for calculating the distance between each municipality and the regional centres, then save closest centre distance
# read in pre-saved data from earlier run

# regional_centre <- data_modified %>%
#   select(name, geometry_transformed, division_1833, pop_padron) %>%
#   group_by(division_1833) %>%
#   summarize(regional_centre = geometry_transformed[which.max(pop_padron)])
# 
# data_modified$regional_centre_dist <- NA
# 
# for (i in 1:nrow(data_modified)){
#   distance <- st_distance(data_modified$geometry_transformed[i],
#                            regional_centre$regional_centre)
# 
#   data_modified$regional_centre_dist[i] <- min(as.vector(distance))
# }
# 
# regional_distances <- data_modified %>%
#   select(munisid, regional_centre_dist)
# 
# write.csv(regional_distances, "regional_distances.csv")

regional_distances <- read_csv("regional_distances.csv") %>%
  group_by(munisid) %>%
  summarize(regional_centre_dist = min(regional_centre_dist))

data_modified <- data_modified %>%
  left_join(regional_distances)
```

```{r}
# code for calculating the distance between each municipality and andorra
# read in pre-saved data from earlier run

# andorra <- st_as_sf(tibble(lat = c(42.5), long = c(1.516667)),
#                     coords = c("long", "lat"), crs = 4386)
# 
# data_modified$andorra_distance <- NA
# 
# for (i in 1:nrow(data_modified)){
#   distance <- st_distance(data_modified$geometry_transformed[i],
#                            andorra)
# 
#   data_modified$andorra_distance[i] <- max(as.vector(distance))
# }
# 
# andorra_distance <- data_modified %>%
#   select(munisid, andorra_distance)
# 
# write.csv(andorra_distance, "andorra_distance.csv")

andorra_distance <- read_csv("andorra_distance.csv") %>%
  group_by(munisid) %>%
  summarize(andorra_distance = min(andorra_distance))

data_modified <- data_modified %>%
  left_join(andorra_distance)
```

```{r}
# code for calculating the distance between each municipality and tangier
# read in pre-saved data from earlier run

# tangier <- st_as_sf(tibble(lat = c(35.776667), long = c(-5.803889)), coords = c("long", "lat"), crs = 4386)
# 
# data_modified$tangier_distance <- NA
# 
# for (i in 1:nrow(data_modified)){
#   distance <- st_distance(data_modified$geometry_transformed[i],
#                            tangier)
# 
#   data_modified$tangier_distance[i] <- max(as.vector(distance))
# }
# 
# tangier_distance <- data_modified %>%
#   select(munisid, tangier_distance)
# 
# write.csv(tangier_distance, "tangier_distance.csv")

tangier_distance <- read_csv("tangier_distance.csv") %>%
  group_by(munisid) %>%
  summarize(tangier_distance = min(tangier_distance))

data_modified <- data_modified %>%
  left_join(tangier_distance)
```

```{r}
# calculate north of madrid variable using longitude coordinate for the middle of spain

data_modified$north_of_madrid <- NA

for (i in 1:nrow(data_modified)){
  data_modified$north_of_madrid[i] <- 
    ifelse(st_is_empty(data_modified$geometry_transformed[i]), NA,
           ifelse(mean(st_coordinates(data_modified$geometry_transformed[i])[,2]) > 40.4168, 1, 0))
}
```

```{r}
# convert each distance from miles to thousands of miles

data_modified$regional_centre_dist_km <- data_modified$regional_centre_dist / 10000
data_modified$historical_mosque_dist_km <- data_modified$historical_mosque_dist / 10000
data_modified$capital_dist_km <- data_modified$capital_dist / 10000
data_modified$tangier_distance_km <- data_modified$tangier_distance / 10000
data_modified$andorra_distance_km <- data_modified$andorra_distance / 10000
```

\newpage

```{r}
# create figure 3

hist_prov <- data_modified %>% 
  group_by(division_1833) %>%
  summarize(geometry = st_union(geometry))

merge %>%
  ggplot() +
  geom_sf(data = province, alpha = 0, color = "red") +
  geom_sf(data = hist_prov, aes(geometry = geometry), alpha = 0, color = "black") +
  theme_map()
```

\newpage

```{r}
# check for model mis-specification: table 2

set.seed(123)

library(RobustSE)

table_one_a_glm <- glm(log_gdppc ~ adj_impact + log_pop + latitude + longitude + rugged + 
                           dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                           class_upper + age + as.factor(autonomia), data %>% filter(trib_head == 0),
                       family = gaussian(link="identity"))

table_one_b_glm <- glm(religious ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia), data %>% filter(trib_head == 0),
                       family = gaussian(link="identity"))

table_one_c_glm <- glm(c_secondplus ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia), data %>% filter(trib_head == 0),
                       family = gaussian(link="identity")) 

table_one_d_glm <- glm(trust2 ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia), data %>% filter(trib_head == 0),
                       family = gaussian(link="identity")) 

GIM(table_one_a_glm, full = TRUE, 5, 5)
GIM(table_one_b_glm, full = TRUE, 5, 5)
GIM(table_one_c_glm, full = TRUE, 5, 5)
GIM(table_one_d_glm, full = TRUE, 5, 5)
```

\newpage

```{r}
# summary statistics on adjusted impact and adjusted trials 
# figure 4

data %>% 
  ggplot() +
  geom_density(aes(adj_impact)) +
  theme_bw() +
  labs(x = "Inquisition Intensity (adj_impact)",
       y = "Density")

# figure 5

data %>% 
  ggplot() +
  geom_density(aes(adj_trials)) +
  theme_bw() +
  labs(x = "Trial Frequency (adj_trials)",
       y = "Density")

summary(data$adj_impact)
summary(data$adj_trials)

sum(data$adj_impact > .5)
sum(data$adj_impact > 0)
sum(!is.na(data$adj_impact))

# figure 6

data %>% 
  filter(adj_impact > 0) %>%
  ggplot() +
  geom_density(aes(asinh(adj_impact))) +
  theme_bw() +
  labs(x = "Inquisition Intensity (adj_impact), inverse hyperbolic sine transformation",
       y = "Density")
```

\newpage

```{r, results = "asis", warning = FALSE, message = FALSE}
# table 3

# run each of the respective regressions, then get the robust standard errors and p-values 

# regression on logged gdp

table_three_a <- lm(log_gdppc ~ asinh(adj_impact) + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia), data %>% filter(trib_head == 0))

table_three_a_stat <- coeftest(table_three_a, vcov = vcovHC(table_three_a, type="HC1"))[,3]
table_three_a_p  <- coeftest(table_three_a, vcov = vcovHC(table_three_a, type="HC1"))[,4]

# regression on religiosity

table_three_b <- lm(religious ~ asinh(adj_impact)  + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia), data %>% filter(trib_head == 0))

table_three_b_stat <- coeftest(table_three_b, vcov = vcovHC(table_three_b, type="HC1"))[,3]
table_three_b_p  <- coeftest(table_three_b, vcov = vcovHC(table_three_b, type="HC1"))[,4]

# regression on education

table_three_c <- lm(c_secondplus ~ asinh(adj_impact)  + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia), data %>% filter(trib_head == 0)) 

table_three_c_stat <- coeftest(table_three_c, vcov = vcovHC(table_three_c, type="HC1"))[,3]
table_three_c_p  <- coeftest(table_three_c, vcov = vcovHC(table_three_c, type="HC1"))[,4]

# regression on social trust

table_three_d <- lm(trust2 ~ asinh(adj_impact)  + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia), data %>% filter(trib_head == 0)) 

table_three_d_stat <- coeftest(table_three_d, vcov = vcovHC(table_three_d, type="HC1"))[,3]
table_three_d_p  <- coeftest(table_three_d, vcov = vcovHC(table_three_d, type="HC1"))[,4]

# create stargazer plot with robust standard errors 

star = stargazer(table_three_a, table_three_b, table_three_c, table_three_d, type = "latex", 
                 se = list(table_three_a_stat, table_three_b_stat, table_three_c_stat, table_three_d_stat), 
                 p = list(table_three_a_p, table_three_b_p, table_three_c_p, table_three_d_p),
          header = FALSE, title = "",
          omit = "autonomia",
          dep.var.labels = c("log GDP p.c.", "religiosity", "education", "trust"),
          covariate.labels = c("inquisitorial intensity", "log population", "latitude",
                               "longitude", "ruggedness", "distance to tribunal",
                               "distance to river", "distance to road", "distance to sea",
                               "share married", "share upper class", "average age",
                               "constant"),
          add.lines = list(c("regional FE", "Yes", "Yes", "Yes", "Yes")),
          omit.stat = c("adj.rsq", "ser", "f"))

star = sub('^.+\\caption.+$','', star)
```

\newpage

```{r}
# create figure 7 with historical province divisions

division_1833 <- data_modified %>% 
  group_by(division_1833) %>%
  summarize(geometry = st_union(geometry))

data_modified %>%
  mutate(regional_centre = ifelse(regional_centre_dist_km == 0, TRUE, FALSE)) %>% 
  ggplot() +
  geom_sf(aes(geometry = geometry, fill = regional_centre_dist_km), 
          lty = .1)  +
  scale_fill_continuous(name = "Distance from Regional Centre (thousand miles)") +
  geom_sf(aes(geometry = geometry, alpha = regional_centre),
          fill = "red", 
          lty = .1, show.legend = F) +
  geom_sf(data = division_1833, aes(geometry = geometry), alpha = 0, color = "black") +
  annotation_scale(location = "bl", width_hint = 0.5) +
  theme_map() +
  theme(legend.justification = c(1, 0), legend.position = c(1, 0),
        legend.text = element_text(size = 7),
        legend.key.size = unit(.5, 'cm'),
        legend.title = element_text(size = 9))
```

\newpage

```{r, results = "asis"}
# table 4, using the asinh transformation on adj_impact

table_four_a_modified <- lm(log_gdppc ~ asinh(adj_impact) + log_pop + rugged + area + density +
             north_of_madrid + regional_centre_dist_km + historical_mosque_dist_km + capital_dist_km + 
             tangier_distance_km + andorra_distance_km + dist_trib + dist_river + 
             dist_road + dist_sea + civil_married + class_upper + age + 
           as.factor(division_1833), 
                    data_modified %>% filter(trib_head == 0))

table_four_a_stat <- coeftest(table_four_a_modified, vcov = vcovHC(table_four_a_modified, type="HC1"))[,3]
table_four_a_p  <- coeftest(table_four_a_modified, vcov = vcovHC(table_four_a_modified, type="HC1"))[,4]

table_four_b_modified <- lm(religious ~ asinh(adj_impact) + log_pop + rugged + area + density + 
             north_of_madrid + regional_centre_dist_km + historical_mosque_dist_km + capital_dist_km + 
             tangier_distance_km + andorra_distance_km +  + dist_trib + dist_river + 
             dist_road + dist_sea + civil_married + class_upper + age + 
           as.factor(division_1833), 
                    data_modified %>% filter(trib_head == 0))

table_four_b_stat <- coeftest(table_four_b_modified, vcov = vcovHC(table_four_b_modified, type="HC1"))[,3]
table_four_b_p  <- coeftest(table_four_b_modified, vcov = vcovHC(table_four_b_modified, type="HC1"))[,4]

table_four_c_modified <- lm(c_secondplus ~ asinh(adj_impact) + log_pop + rugged + area + density +
             north_of_madrid + regional_centre_dist_km + historical_mosque_dist_km + capital_dist_km + 
             tangier_distance_km + andorra_distance_km + dist_trib + dist_river + 
             dist_road + dist_sea + civil_married + class_upper + age + 
           as.factor(division_1833), 
                    data_modified %>% filter(trib_head == 0))

table_four_c_stat <- coeftest(table_four_c_modified, vcov = vcovHC(table_four_c_modified, type="HC1"))[,3]
table_four_c_p  <- coeftest(table_four_c_modified, vcov = vcovHC(table_four_c_modified, type="HC1"))[,4]

table_four_d_modified <- lm(trust2 ~ asinh(adj_impact) + log_pop + rugged + area + density + 
             north_of_madrid + regional_centre_dist_km + historical_mosque_dist_km + capital_dist_km + 
             tangier_distance_km + andorra_distance_km + dist_trib + dist_river + 
             dist_road + dist_sea + civil_married + class_upper + age + 
           as.factor(division_1833), 
                    data_modified %>% filter(trib_head == 0))

table_four_d_stat <- coeftest(table_four_d_modified, vcov = vcovHC(table_four_d_modified, type="HC1"))[,3]
table_four_d_p  <- coeftest(table_four_d_modified, vcov = vcovHC(table_four_d_modified, type="HC1"))[,4]

star_modified = stargazer(table_four_a_modified, 
                          table_four_b_modified, 
                          table_four_c_modified, 
                          table_four_d_modified, 
                         type = "latex",
                         title = "Panel A: OLS",
                         dep.var.labels = c("log GDP p.c.", "religiosity", "education", "trust"),
                         covariate.labels = c("inquisitorial intensity, transformed", 
                                              "area", "density",
                                               "north of madrid",
                                              "distance to regional centre of importance (kilo mile)",
                                              "distance to historical mosque (kilo mile)",
                                              "distance to capital (kilo mile)",
                                              "distance to tangier, morocco (kilo mile)",
                                              "distance to andorra (kilo mile)", "constant"),
                         keep = c("adj_impact", "area", "density",
                                  "north_of_madrid",
                                  "regional_centre_dist", "historical_mosque_dist",
                                  "capital_dist", "tangier_distance", "andorra_distance",
                                  "Constant"),
                         omit = c("division_1833"),
                         omit.stat = c("ser", "f"),
                         add.lines = list(c("Original Controls", "Yes", "Yes", "Yes", "Yes"),
                                            c("1833 Provinces FE", "Yes", "Yes", "Yes", "Yes")),
                         se = list(table_four_a_stat, table_four_b_stat, table_four_c_stat, table_four_d_stat),
                         p = list(table_four_a_p, table_four_b_p, table_four_c_p, table_four_d_p),
                         header = FALSE)
```

\newpage

```{r, results = "asis"}
# create table 5, with the new data for cem and re-run ols  
# original cem/ols figure is taken from table a2 in the appendix

master_cem_data <- data_modified %>%
  filter(trib_head == 0) %>% 
  select(pop_padron, pop1528, latitude, longitude, rugged, dist_trib, dist_road, 
         dist_river, dist_sea, regional_centre_dist_km, historical_mosque_dist_km, capital_dist_km,  
         impacthigh, adj_impact, log_pop, civil_married,
         class_upper, age, autonomia, log_gdppc, religious, c_secondplus, trust2, division_1833, area,
         north_of_madrid, tangier_distance_km, andorra_distance_km, density, area)

cem_data <- master_cem_data %>% 
  select(pop_padron, pop1528, latitude, longitude, rugged, dist_river, dist_sea,
         impacthigh, regional_centre_dist_km, historical_mosque_dist_km, capital_dist_km,
         tangier_distance_km, andorra_distance_km)

cem_data <- data.frame(cem_data)
master_cem_data <- data.frame(master_cem_data)

table_five_cem <- cem(treatment = "impacthigh", data = cem_data) 

table_five_a_cem <- lm(log_gdppc ~ adj_impact + log_pop + rugged + area + density +
             north_of_madrid + regional_centre_dist_km + historical_mosque_dist_km + capital_dist_km + 
             tangier_distance_km + andorra_distance_km + dist_trib + dist_river + 
             dist_road + dist_sea + civil_married + class_upper + age + 
           as.factor(division_1833), data = master_cem_data,
                    weights = table_five_cem$w)

table_five_a_stat <- coeftest(table_five_a_cem, vcov = vcovHC(table_five_a_cem, type="HC1"))[,3]
table_five_a_p  <- coeftest(table_five_a_cem, vcov = vcovHC(table_five_a_cem, type="HC1"))[,4]

table_five_b_cem <- lm(religious ~ adj_impact + log_pop + rugged + area + density +
             north_of_madrid + regional_centre_dist_km + historical_mosque_dist_km + capital_dist_km + 
             tangier_distance_km + andorra_distance_km + dist_trib + dist_river + 
             dist_road + dist_sea + civil_married + class_upper + age + 
           as.factor(division_1833), data = master_cem_data,
                    weights = table_five_cem$w)

table_five_b_stat <- coeftest(table_five_b_cem, vcov = vcovHC(table_five_b_cem, type="HC1"))[,3]
table_five_b_p  <- coeftest(table_five_b_cem, vcov = vcovHC(table_five_b_cem, type="HC1"))[,4]

table_five_c_cem <- lm(c_secondplus ~ adj_impact + log_pop + rugged + area + density +
             north_of_madrid + regional_centre_dist_km + historical_mosque_dist_km + capital_dist_km + 
             tangier_distance_km + andorra_distance_km + dist_trib + dist_river + 
             dist_road + dist_sea + civil_married + class_upper + age + 
           as.factor(division_1833), data = master_cem_data,
                    weights = table_five_cem$w) 

table_five_c_stat <- coeftest(table_five_c_cem, vcov = vcovHC(table_five_c_cem, type="HC1"))[,3]
table_five_c_p  <- coeftest(table_five_c_cem, vcov = vcovHC(table_five_c_cem, type="HC1"))[,4]

table_five_d_cem <- lm(trust2 ~ adj_impact + log_pop + rugged + area + density +
             north_of_madrid + regional_centre_dist_km + historical_mosque_dist_km + capital_dist_km + 
             tangier_distance_km + andorra_distance_km + dist_trib + dist_river + 
             dist_road + dist_sea + civil_married + class_upper + age + 
           as.factor(division_1833), data = master_cem_data,
                    weights = table_five_cem$w)

table_five_d_stat <- coeftest(table_five_d_cem, vcov = vcovHC(table_five_d_cem, type="HC1"))[,3]
table_five_d_p  <- coeftest(table_five_d_cem, vcov = vcovHC(table_five_d_cem, type="HC1"))[,4]

# observations manually written in stargazer or will print all obs, both matched and unmatched

star_five_cem = stargazer(table_five_a_cem, table_five_b_cem, table_five_c_cem, table_five_d_cem, 
                         title = "Panel B: CEM",
                         keep = ("adj_impact"),
                         dep.var.labels = c("log GDP p.c.", "religiosity", "education", "trust"),
                         add.lines = list(c("Observations", "699", "690", "699", "283")),
                         covariate.labels = c("inquisitorial intensity"),
                         omit.stat = c("adj.rsq", "ser", "f", "n"),
                         type = "latex",
                         se = list(table_five_a_stat, table_five_b_stat, table_five_c_stat, table_five_d_stat),
                         p = list(table_five_a_p, table_five_b_p, table_five_c_p, table_five_d_p),
                         header = FALSE)
```

APPENDIX

```{r, warning = FALSE, message = FALSE}
#### FIGURE A2
zero_fig_two <- data %>% 
  filter(gdppc > 12000 & gdppc < 25000, adj_impact == 0) %>%
  mutate(tercile = "0")
  
figure_two <- data %>% 
  filter(gdppc > 12000 & gdppc < 25000, adj_impact > 0) %>%
  mutate(tercile = as.character(fabricatr::split_quantile(adj_impact, 3))) %>%
  bind_rows(zero_fig_two) %>%
  ggplot(aes(x = gdppc, linetype = tercile)) +
  geom_line(stat = "density") + 
  labs(x = "GDP p.c. in \u20ac", y = "Density") + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), 
        axis.line = element_line(colour = "black"),
        legend.key = element_blank(),
        legend.position = "top",
        axis.text.x = element_text(color = "black"),
        axis.text.y = element_text(color = "black"),
        legend.text = element_text(size = 7),
        legend.key.size = unit(.5, 'cm'),
        legend.title = element_text(size = 9)) +
  xlim(10000, 25000) +
  scale_linetype_manual(labels = c("no impact/missing data",
                                  "lowest tercile, not zero",
                                  "middle tercile",
                                  "highest tercile"),
                        values = c("solid", "dashed", "dotted", "dotdash"),
                        name = "") +
  guides(linetype = guide_legend(nrow = 2 , byrow = TRUE)) + 
  geom_hline(yintercept = 0, colour = "white", size = 1.5) +
  scale_y_continuous(breaks = c(0, .0002, .0004),
                     limits = c(0, .0005),
                     labels = scales::label_number())

figure_two

# ggsave(plot = figure_two, "figure2.png", device = "png")
```

\newpage

```{r, warning = FALSE, message = FALSE, out.width = "80%"}
### Figure A4

# create a plot for seville alone, including labels for hospitals

inq_int_plot <- data %>% 
  select(muni_name, adj_impact, court_geo, hospitals) %>%
  rename(name = muni_name) %>% 
  inner_join(munic %>% filter(!ine.ccaa.name %in% c("Canarias")) 
             %>% select(name, geometry)) %>% 
  filter(court_geo == 6) %>%
  mutate(hospital_label = ifelse(hospitals == 0, "*", "H")) %>% 
  ggplot() +
  geom_sf(aes(geometry = geometry, fill = cut(adj_impact, 
                                              breaks = c(-Inf, 0, .025, .05, .075, .1, 1))), 
          size = .3,
          color = "black") +
  scale_fill_brewer("", palette = "Blues") +
  theme_map() +
  theme(legend.justification = c(0, 0), legend.position = c(0, 0),
        legend.key.size = unit(.6, 'cm'),
        plot.title.position = "plot") +
  scale_y_continuous(limits = c(35, 39)) +
  geom_sf_text((aes(geometry = geometry, label = hospital_label))) +
  labs(title = "Inquisitorial Intensity")

gdp_cap_plot <- data %>% 
  select(muni_name, gdppc, court_geo) %>%
  rename(name = muni_name) %>% 
  inner_join(munic %>% filter(!ine.ccaa.name %in% c("Canarias")) 
             %>% select(name, geometry)) %>% 
  filter(court_geo == 6) %>%
  mutate(gdp_quart = as.factor(ntile(gdppc, 4))) %>%
  ggplot() +
  geom_sf(aes(fill = gdp_quart, geometry = geometry), 
          size = .3,
          color = "black") +
  scale_fill_brewer("", palette = "Blues", labels = c("First quartile", 
                                                      "Second quartile",
                                                      "Third quartile",
                                                      "Fourth quartile")) +
  theme_map() +
  theme(legend.justification = c(0, 0), legend.position = c(0, 0),
        legend.key.size = unit(.6, 'cm'),
        plot.title.position = "plot") +
  scale_y_continuous(limits = c(35, 39)) +
  labs(title = "GDP per Capita")

gridExtra::grid.arrange(inq_int_plot, gdp_cap_plot, nrow = 1)
```

```{r, results = "asis", , include = FALSE}
### Table A2

### ### ### ### ### ### ### ### ### ### ### 
### first, run all the base OLS without matching, and get all the robust standard errors and p-values

table_two_a_ols <- lm(log_gdppc ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia) + hospitals, 
                  data %>% filter(trib_head == 0)) 

table_two_a_ols_stat <- coeftest(table_two_a_ols, vcov = vcovHC(table_two_a_ols, type="HC1"))[,3]
table_two_a_ols_p  <- coeftest(table_two_a_ols, vcov = vcovHC(table_two_a_ols, type="HC1"))[,4]

table_two_b_ols <- lm(religious ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia) + hospitals, 
                  data %>% filter(trib_head == 0)) 

table_two_b_ols_stat <- coeftest(table_two_b_ols, vcov = vcovHC(table_two_b_ols, type="HC1"))[,3]
table_two_b_ols_p  <- coeftest(table_two_b_ols, vcov = vcovHC(table_two_b_ols, type="HC1"))[,4]

table_two_c_ols <- lm(c_secondplus ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia) + hospitals, 
                  data %>% filter(trib_head == 0)) 

table_two_c_ols_stat <- coeftest(table_two_c_ols, vcov = vcovHC(table_two_c_ols, type="HC1"))[,3]
table_two_c_ols_p  <- coeftest(table_two_c_ols, vcov = vcovHC(table_two_c_ols, type="HC1"))[,4]

table_two_d_ols <- lm(trust2 ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married + 
                    class_upper + age + as.factor(autonomia) + hospitals, 
                  data %>% filter(trib_head == 0))  

table_two_d_ols_stat <- coeftest(table_two_d_ols, vcov = vcovHC(table_two_d_ols, type="HC1"))[,3]
table_two_d_ols_p  <- coeftest(table_two_d_ols, vcov = vcovHC(table_two_d_ols, type="HC1"))[,4]

star_two_ols = stargazer(table_two_a_ols, table_two_b_ols, table_two_c_ols, table_two_d_ols, 
                         se = list(table_two_a_ols_stat, table_two_b_ols_stat, 
                                   table_two_c_ols_stat, table_two_d_ols_stat), 
                         p = list(table_two_a_ols_p, table_two_b_ols_p, table_two_c_ols_p, table_two_d_ols_p),
                         type = "latex",
                         title = "Panel A: OLS",
                         keep = ("adj_impact"),
                         dep.var.labels = c("log GDP p.c.", "religiosity", "education", "trust"),
                         covariate.labels = c("inquisitorial intensity"),
                         omit.stat = c("adj.rsq", "ser", "f"),
                         header = FALSE)

### ### ### ### ### ### ### ### ### ### ### 
### now, run matching and re-run OLS on matched data

master_cem_data <- data %>%
  filter(trib_head == 0) %>% 
  select(pop_padron, pop1528, latitude, longitude, rugged, dist_trib, dist_road, 
         dist_river, dist_sea,impacthigh, adj_impact, log_pop, civil_married,
         class_upper, age, autonomia, log_gdppc, religious, c_secondplus, trust2)

cem_data <- master_cem_data %>% 
  select(pop_padron, pop1528, latitude, longitude, rugged, dist_river, dist_sea,
         impacthigh)

cem_data <- data.frame(cem_data)
master_cem_data <- data.frame(master_cem_data)

table_two_cem <- cem(treatment = "impacthigh", data = cem_data) 

table_two_a_cem <- lm(log_gdppc ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married +
                      class_upper + age + as.factor(autonomia), data = master_cem_data,
                    weights = table_two_cem$w)

table_two_a_cem_stat <- coeftest(table_two_a_cem, vcov = vcovHC(table_two_a_cem, type="HC1"))[,3]
table_two_a_cem_p  <- coeftest(table_two_a_cem, vcov = vcovHC(table_two_a_cem, type="HC1"))[,4]

table_two_b_cem <- lm(religious ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married +
                      class_upper + age + as.factor(autonomia), data = master_cem_data, 
                      weight = table_two_cem$w)

table_two_b_cem_stat <- coeftest(table_two_b_cem, vcov = vcovHC(table_two_b_cem, type="HC1"))[,3]
table_two_b_cem_p  <- coeftest(table_two_b_cem, vcov = vcovHC(table_two_b_cem, type="HC1"))[,4]

table_two_c_cem <- lm(c_secondplus ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married +
                      class_upper + age + as.factor(autonomia), data = master_cem_data, 
                      weight = table_two_cem$w) 

table_two_c_cem_stat <- coeftest(table_two_c_cem, vcov = vcovHC(table_two_c_cem, type="HC1"))[,3]
table_two_c_cem_p  <- coeftest(table_two_c_cem, vcov = vcovHC(table_two_c_cem, type="HC1"))[,4]

table_two_d_cem <- lm(trust2 ~ adj_impact + log_pop + latitude + longitude + rugged + 
                    dist_trib + dist_river + dist_road + dist_sea + civil_married +
                      class_upper + age + as.factor(autonomia), data = master_cem_data, 
                      weight = table_two_cem$w)

table_two_d_cem_stat <- coeftest(table_two_d_cem, vcov = vcovHC(table_two_d_cem, type="HC1"))[,3]
table_two_d_cem_p  <- coeftest(table_two_d_cem, vcov = vcovHC(table_two_d_cem, type="HC1"))[,4]

star_two_cem = stargazer(table_two_a_cem, table_two_b_cem, table_two_c_cem, table_two_d_cem, 
                         se = list(table_two_a_cem_stat, table_two_b_cem_stat, 
                                   table_two_c_cem_stat, table_two_d_cem_stat), 
                         p = list(table_two_a_cem_p, table_two_b_cem_p, table_two_c_cem_p, table_two_d_cem_p),
                         title = "Panel B: CEM",
                         keep = ("adj_impact"),
                         dep.var.labels = c("log GDP p.c.", "religiosity", "education", "trust"),
                         covariate.labels = c("inquisitorial intensity"),
                         omit.stat = c("adj.rsq", "ser", "f", "n"),
                          add.lines = list(c("Observations", "1228", "1216", "1229", "496")),
                         type = "latex",
                         header = FALSE)
```

```{r, results = "asis", fig.show = "hold", out.width = "100%"}
cat(star_two_ols)
cat(star_two_cem)
```
